Reading the dataset
data = read.csv("datasets/normalized_data.csv")
data = data[, c(-2, -3, -4)]
head(data)
## X danceability energy key loudness mode speechiness acousticness
## 1 8 0.720 0.6240 5 0.7579 0 0.0473 0.795
## 2 33 0.357 0.2160 3 0.6872 1 0.0343 0.767
## 3 42 0.257 0.3780 5 0.7480 1 0.0329 0.739
## 4 50 0.763 0.6210 2 0.7424 1 0.0374 0.655
## 5 58 0.608 0.0113 5 0.3194 1 0.0677 0.934
## 6 63 0.238 0.2800 3 0.7123 1 0.0307 0.943
## instrumentalness liveness valence tempo duration_ms time_signature
## 1 0.0000 0.488 0.887 0.4970 0.0349 4
## 2 0.0000 0.531 0.283 0.4953 0.0500 4
## 3 0.0001 0.120 0.385 0.3378 0.0430 4
## 4 0.0000 0.158 0.825 0.4279 0.0311 4
## 5 0.8120 0.107 0.150 0.4778 0.0387 4
## 6 0.0001 0.331 0.275 0.3199 0.0420 3
## chorus_hit sections target genre popularity
## 1 0.0702 8 1 6 24
## 2 0.0862 10 1 6 70
## 3 0.0657 11 0 7 45
## 4 0.0767 6 1 12 60
## 5 0.1182 8 0 6 38
## 6 0.0614 9 0 6 0
min_max_normalisation <- function(x) {
(x-min(x))/(max(x)-min(x))
}
Splitting dataset into decades
data = data[order(data$X), ]
head(data)
## X danceability energy key loudness mode speechiness acousticness
## 4814 1 0.417 0.620 3 0.7836 1 0.0403 0.490
## 4815 2 0.498 0.505 3 0.6940 1 0.0337 0.018
## 4816 3 0.657 0.649 5 0.6767 1 0.0380 0.846
## 4817 4 0.590 0.545 7 0.7018 0 0.1040 0.706
## 4818 5 0.515 0.765 11 0.8630 0 0.1240 0.857
## 4819 6 0.697 0.673 0 0.7299 1 0.0266 0.714
## instrumentalness liveness valence tempo duration_ms time_signature
## 4814 0.0000 0.0779 0.845 0.7690 0.0381 3
## 4815 0.1070 0.1760 0.797 0.4217 0.0478 4
## 4816 0.0000 0.1190 0.908 0.4802 0.0503 4
## 4817 0.0246 0.0610 0.967 0.4374 0.0344 4
## 4818 0.0009 0.2130 0.906 0.4748 0.0555 4
## 4819 0.9190 0.1220 0.778 0.4644 0.0367 4
## chorus_hit sections target genre popularity
## 4814 0.0761 9 1 2 23
## 4815 0.1127 10 0 12 63
## 4816 0.0859 12 0 2 19
## 4817 0.0571 8 0 3 67
## 4818 0.0503 14 0 3 54
## 4819 0.1512 7 0 5 70
sixties = data[1:8642, ]
dim(sixties)
## [1] 8642 19
seventies = data[8643:16408, ]
dim(seventies)
## [1] 7766 19
eighties = data[16409:23316, ]
dim(eighties)
## [1] 6908 19
nineties = data[23317:28836, ]
dim(nineties)
## [1] 5520 19
twentieth = data[28837:34708, ]
dim(twentieth)
## [1] 5872 19
twentieth_1 = data[34709:41106, ]
dim(twentieth_1)
## [1] 6398 19
Analysing the features of each decade
1960s
six = summary(sixties)
six
## X danceability energy key
## Min. : 1 Min. :0.0000 Min. :0.000576 Min. : 0.000
## 1st Qu.:2161 1st Qu.:0.3820 1st Qu.:0.281000 1st Qu.: 2.000
## Median :4322 Median :0.5010 Median :0.436000 Median : 5.000
## Mean :4322 Mean :0.4922 Mean :0.445210 Mean : 5.076
## 3rd Qu.:6482 3rd Qu.:0.6120 3rd Qu.:0.608000 3rd Qu.: 8.000
## Max. :8642 Max. :0.9220 Max. :0.995000 Max. :11.000
## loudness mode speechiness acousticness
## Min. :0.1436 Min. :0.0000 Min. :0.0000 Min. :0.0000054
## 1st Qu.:0.6577 1st Qu.:1.0000 1st Qu.:0.0323 1st Qu.:0.4080000
## Median :0.7180 Median :1.0000 Median :0.0387 Median :0.6830000
## Mean :0.7024 Mean :0.7556 Mean :0.0624 Mean :0.6155568
## 3rd Qu.:0.7669 3rd Qu.:1.0000 3rd Qu.:0.0529 3rd Qu.:0.8510000
## Max. :0.9198 Max. :1.0000 Max. :0.9600 Max. :0.9960000
## instrumentalness liveness valence tempo
## Min. :0.00000 Min. :0.0136 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.1030 1st Qu.:0.3610 1st Qu.:0.3882
## Median :0.00000 Median :0.1480 Median :0.6025 Median :0.4654
## Mean :0.14992 Mean :0.2135 Mean :0.5750 Mean :0.4769
## 3rd Qu.:0.03655 3rd Qu.:0.2780 3rd Qu.:0.8160 3rd Qu.:0.5464
## Max. :0.99900 Max. :0.9840 Max. :0.9930 Max. :0.9983
## duration_ms time_signature chorus_hit sections
## Min. :0.00000 Min. :0.000 Min. :0.00000 Min. : 0.000
## 1st Qu.:0.03030 1st Qu.:4.000 1st Qu.:0.06252 1st Qu.: 7.000
## Median :0.03550 Median :4.000 Median :0.08120 Median : 8.000
## Mean :0.04055 Mean :3.817 Mean :0.08989 Mean : 8.884
## 3rd Qu.:0.04288 3rd Qu.:4.000 3rd Qu.:0.10840 3rd Qu.: 10.000
## Max. :0.60210 Max. :5.000 Max. :0.43280 Max. :109.000
## target genre popularity
## Min. :0.0 Min. : 1.000 Min. : 0.0
## 1st Qu.:0.0 1st Qu.: 2.000 1st Qu.:34.0
## Median :0.5 Median : 3.000 Median :51.0
## Mean :0.5 Mean : 3.679 Mean :46.8
## 3rd Qu.:1.0 3rd Qu.: 6.000 3rd Qu.:62.0
## Max. :1.0 Max. :12.000 Max. :88.0
1970s
seven = summary(seventies)
seven
## X danceability energy key
## Min. : 8643 Min. :0.0630 Min. :0.0011 Min. : 0.000
## 1st Qu.:10584 1st Qu.:0.4150 1st Qu.:0.3540 1st Qu.: 2.000
## Median :12526 Median :0.5345 Median :0.5330 Median : 5.000
## Mean :12526 Mean :0.5255 Mean :0.5280 Mean : 5.208
## 3rd Qu.:14467 3rd Qu.:0.6460 3rd Qu.:0.7120 3rd Qu.: 9.000
## Max. :16408 Max. :0.9610 Max. :0.9990 Max. :11.000
## loudness mode speechiness acousticness
## Min. :0.1474 Min. :0.000 Min. :0.02250 Min. :0.0000014
## 1st Qu.:0.6639 1st Qu.:0.000 1st Qu.:0.03350 1st Qu.:0.1240000
## Median :0.7209 Median :1.000 Median :0.04160 Median :0.4090000
## Mean :0.7079 Mean :0.722 Mean :0.06205 Mean :0.4342403
## 3rd Qu.:0.7709 3rd Qu.:1.000 3rd Qu.:0.06048 3rd Qu.:0.7260000
## Max. :1.0000 Max. :1.000 Max. :0.95200 Max. :0.9960000
## instrumentalness liveness valence tempo
## Min. :0.0000 Min. :0.0146 Min. :0.000 Min. :0.1480
## 1st Qu.:0.0000 1st Qu.:0.0924 1st Qu.:0.397 1st Qu.:0.4074
## Median :0.0006 Median :0.1270 Median :0.638 Median :0.4863
## Mean :0.1622 Mean :0.1998 Mean :0.596 Mean :0.4930
## 3rd Qu.:0.1000 3rd Qu.:0.2430 3rd Qu.:0.824 3rd Qu.:0.5595
## Max. :0.9980 Max. :0.9990 Max. :0.990 Max. :1.0000
## duration_ms time_signature chorus_hit sections
## Min. :0.00130 Min. :1.000 Min. :0.00000 Min. : 0.00
## 1st Qu.:0.03803 1st Qu.:4.000 1st Qu.:0.06402 1st Qu.: 8.00
## Median :0.04710 Median :4.000 Median :0.08280 Median : 10.00
## Mean :0.05409 Mean :3.884 Mean :0.09199 Mean : 10.76
## 3rd Qu.:0.06010 3rd Qu.:4.000 3rd Qu.:0.10900 3rd Qu.: 12.00
## Max. :0.81250 Max. :5.000 Max. :0.50790 Max. :130.00
## target genre popularity
## Min. :0.0 Min. : 1.000 Min. : 0.00
## 1st Qu.:0.0 1st Qu.: 2.000 1st Qu.:37.00
## Median :0.5 Median : 3.000 Median :51.00
## Mean :0.5 Mean : 4.215 Mean :49.33
## 3rd Qu.:1.0 3rd Qu.: 6.000 3rd Qu.:65.00
## Max. :1.0 Max. :12.000 Max. :89.00
1980s
eight = summary(eighties)
eight
## X danceability energy key
## Min. :16409 Min. :0.0656 Min. :0.000276 Min. : 0.000
## 1st Qu.:18136 1st Qu.:0.4550 1st Qu.:0.436000 1st Qu.: 2.000
## Median :19863 Median :0.5820 Median :0.637000 Median : 5.000
## Mean :19863 Mean :0.5643 Mean :0.608037 Mean : 5.233
## 3rd Qu.:21589 3rd Qu.:0.6900 3rd Qu.:0.809000 3rd Qu.: 9.000
## Max. :23316 Max. :0.9880 Max. :1.000000 Max. :11.000
## loudness mode speechiness acousticness
## Min. :0.1757 Min. :0.0000 Min. :0.02230 Min. :0.0000014
## 1st Qu.:0.6755 1st Qu.:0.0000 1st Qu.:0.03270 1st Qu.:0.0333750
## Median :0.7282 Median :1.0000 Median :0.04060 Median :0.1720000
## Mean :0.7210 Mean :0.6876 Mean :0.05791 Mean :0.2930668
## 3rd Qu.:0.7852 3rd Qu.:1.0000 3rd Qu.:0.05800 3rd Qu.:0.4990000
## Max. :0.9165 Max. :1.0000 Max. :0.90300 Max. :0.9960000
## instrumentalness liveness valence tempo
## Min. :0.00000 Min. :0.0186 Min. :0.00001 Min. :0.1616
## 1st Qu.:0.00000 1st Qu.:0.0849 1st Qu.:0.38300 1st Qu.:0.4250
## Median :0.00030 Median :0.1310 Median :0.62200 Median :0.4952
## Mean :0.13986 Mean :0.2010 Mean :0.58792 Mean :0.4997
## 3rd Qu.:0.04525 3rd Qu.:0.2660 3rd Qu.:0.81100 3rd Qu.:0.5599
## Max. :1.00000 Max. :0.9970 Max. :0.99000 Max. :0.9005
## duration_ms time_signature chorus_hit sections
## Min. :0.00350 Min. :1.000 Min. :0.00000 Min. : 1.00
## 1st Qu.:0.04560 1st Qu.:4.000 1st Qu.:0.06330 1st Qu.: 9.00
## Median :0.05460 Median :4.000 Median :0.08210 Median :11.00
## Mean :0.05766 Mean :3.926 Mean :0.09178 Mean :11.28
## 3rd Qu.:0.06540 3rd Qu.:4.000 3rd Qu.:0.10920 3rd Qu.:13.00
## Max. :0.53160 Max. :5.000 Max. :1.00000 Max. :73.00
## target genre popularity
## Min. :0.0 Min. : 1.000 Min. : 0.00
## 1st Qu.:0.0 1st Qu.: 2.000 1st Qu.:42.00
## Median :0.5 Median : 3.000 Median :55.00
## Mean :0.5 Mean : 4.497 Mean :52.64
## 3rd Qu.:1.0 3rd Qu.: 7.000 3rd Qu.:68.00
## Max. :1.0 Max. :12.000 Max. :89.00
1990s
nine = summary(nineties)
nine
## X danceability energy key
## Min. :23317 Min. :0.0576 Min. :0.000357 Min. : 0.000
## 1st Qu.:24697 1st Qu.:0.4510 1st Qu.:0.435000 1st Qu.: 2.000
## Median :26077 Median :0.5860 Median :0.634500 Median : 5.000
## Mean :26077 Mean :0.5669 Mean :0.602246 Mean : 5.263
## 3rd Qu.:27456 3rd Qu.:0.6990 3rd Qu.:0.811000 3rd Qu.: 8.000
## Max. :28836 Max. :0.9790 Max. :0.998000 Max. :11.000
## loudness mode speechiness acousticness
## Min. :0.0000 Min. :0.0000 Min. :0.02200 Min. :0.0000
## 1st Qu.:0.6990 1st Qu.:0.0000 1st Qu.:0.03310 1st Qu.:0.0180
## Median :0.7578 Median :1.0000 Median :0.04320 Median :0.1400
## Mean :0.7362 Mean :0.6696 Mean :0.07400 Mean :0.2912
## 3rd Qu.:0.7993 3rd Qu.:1.0000 3rd Qu.:0.07283 3rd Qu.:0.5272
## Max. :0.9073 Max. :1.0000 Max. :0.95000 Max. :0.9960
## instrumentalness liveness valence tempo
## Min. :0.00000 Min. :0.01300 Min. :0.0000 Min. :0.1430
## 1st Qu.:0.00000 1st Qu.:0.08968 1st Qu.:0.3300 1st Qu.:0.3994
## Median :0.00020 Median :0.12700 Median :0.5560 Median :0.4762
## Mean :0.15988 Mean :0.19720 Mean :0.5377 Mean :0.4922
## 3rd Qu.:0.09137 3rd Qu.:0.25900 3rd Qu.:0.7510 3rd Qu.:0.5633
## Max. :0.99700 Max. :0.99200 Max. :0.9960 Max. :0.9027
## duration_ms time_signature chorus_hit sections
## Min. :0.00110 Min. :1.000 Min. :0.00000 Min. : 1.00
## 1st Qu.:0.04510 1st Qu.:4.000 1st Qu.:0.06480 1st Qu.: 9.00
## Median :0.05570 Median :4.000 Median :0.08430 Median :11.00
## Mean :0.05801 Mean :3.922 Mean :0.09465 Mean :11.13
## 3rd Qu.:0.06610 3rd Qu.:4.000 3rd Qu.:0.11230 3rd Qu.:13.00
## Max. :0.40830 Max. :5.000 Max. :0.54260 Max. :69.00
## target genre popularity
## Min. :0.0 Min. : 1.000 Min. : 0.00
## 1st Qu.:0.0 1st Qu.: 2.000 1st Qu.:39.00
## Median :0.5 Median : 3.000 Median :54.00
## Mean :0.5 Mean : 4.647 Mean :51.24
## 3rd Qu.:1.0 3rd Qu.: 7.000 3rd Qu.:68.00
## Max. :1.0 Max. :12.000 Max. :94.00
2000s
twenty = summary(twentieth)
twenty
## X danceability energy key
## Min. :28837 Min. :0.0588 Min. :0.000348 Min. : 0.000
## 1st Qu.:30305 1st Qu.:0.4160 1st Qu.:0.567000 1st Qu.: 2.000
## Median :31773 Median :0.5560 Median :0.744000 Median : 5.000
## Mean :31773 Mean :0.5429 Mean :0.694511 Mean : 5.276
## 3rd Qu.:33240 3rd Qu.:0.6810 3rd Qu.:0.885000 3rd Qu.: 8.000
## Max. :34708 Max. :0.9860 Max. :0.999000 Max. :11.000
## loudness mode speechiness acousticness
## Min. :0.0363 Min. :0.0000 Min. :0.02240 Min. :0.000000
## 1st Qu.:0.7725 1st Qu.:0.0000 1st Qu.:0.03617 1st Qu.:0.004553
## Median :0.8154 Median :1.0000 Median :0.05270 Median :0.060300
## Mean :0.7888 Mean :0.6451 Mean :0.09236 Mean :0.214374
## 3rd Qu.:0.8433 3rd Qu.:1.0000 3rd Qu.:0.10700 3rd Qu.:0.312000
## Max. :0.9508 Max. :1.0000 Max. :0.95000 Max. :0.996000
## instrumentalness liveness valence tempo
## Min. :0.00000 Min. :0.0193 Min. :0.0000 Min. :0.1937
## 1st Qu.:0.00000 1st Qu.:0.0937 1st Qu.:0.2780 1st Qu.:0.4017
## Median :0.00000 Median :0.1310 Median :0.4860 Median :0.4970
## Mean :0.15092 Mean :0.1961 Mean :0.4823 Mean :0.5037
## 3rd Qu.:0.04727 3rd Qu.:0.2630 3rd Qu.:0.6870 3rd Qu.:0.5861
## Max. :0.99800 Max. :0.9870 Max. :0.9820 Max. :0.8832
## duration_ms time_signature chorus_hit sections
## Min. :0.00020 Min. :0.000 Min. :0.00000 Min. : 1.00
## 1st Qu.:0.04610 1st Qu.:4.000 1st Qu.:0.06350 1st Qu.: 9.00
## Median :0.05360 Median :4.000 Median :0.08320 Median : 10.00
## Mean :0.05848 Mean :3.914 Mean :0.09403 Mean : 11.06
## 3rd Qu.:0.06350 3rd Qu.:4.000 3rd Qu.:0.11052 3rd Qu.: 12.00
## Max. :1.00000 Max. :5.000 Max. :0.60620 Max. :169.00
## target genre popularity
## Min. :0.0 Min. : 1.000 Min. : 0.00
## 1st Qu.:0.0 1st Qu.: 3.000 1st Qu.: 40.75
## Median :0.5 Median : 3.000 Median : 60.00
## Mean :0.5 Mean : 5.332 Mean : 54.52
## 3rd Qu.:1.0 3rd Qu.: 9.000 3rd Qu.: 68.00
## Max. :1.0 Max. :12.000 Max. :100.00
2010s
twentyone = summary(twentieth_1)
twentyone
## X danceability energy key
## Min. :34709 Min. :0.0622 Min. :0.000251 Min. : 0.000
## 1st Qu.:36308 1st Qu.:0.4470 1st Qu.:0.533000 1st Qu.: 2.000
## Median :37908 Median :0.5880 Median :0.712500 Median : 5.000
## Mean :37908 Mean :0.5682 Mean :0.667756 Mean : 5.284
## 3rd Qu.:39507 3rd Qu.:0.7100 3rd Qu.:0.857000 3rd Qu.: 8.000
## Max. :41106 Max. :0.9810 Max. :0.999000 Max. :11.000
## loudness mode speechiness acousticness
## Min. :0.0490 Min. :0.0000 Min. :0.02250 Min. :0.000000
## 1st Qu.:0.7703 1st Qu.:0.0000 1st Qu.:0.03882 1st Qu.:0.008533
## Median :0.8143 Median :1.0000 Median :0.05720 Median :0.067050
## Mean :0.7861 Mean :0.6455 Mean :0.09802 Mean :0.216928
## 3rd Qu.:0.8425 3rd Qu.:1.0000 3rd Qu.:0.11200 3rd Qu.:0.311000
## Max. :0.9265 Max. :1.0000 Max. :0.95600 Max. :0.996000
## instrumentalness liveness valence tempo
## Min. :0.00000 Min. :0.0167 Min. :0.0000 Min. :0.1631
## 1st Qu.:0.00000 1st Qu.:0.0968 1st Qu.:0.2400 1st Qu.:0.4063
## Median :0.00000 Median :0.1260 Median :0.4340 Median :0.5015
## Mean :0.16529 Mean :0.1967 Mean :0.4437 Mean :0.5068
## 3rd Qu.:0.05765 3rd Qu.:0.2490 3rd Qu.:0.6280 3rd Qu.:0.5844
## Max. :0.99500 Max. :0.9820 Max. :0.9760 Max. :0.8739
## duration_ms time_signature chorus_hit sections
## Min. :0.00350 Min. :0.000 Min. :0.00000 Min. : 2.00
## 1st Qu.:0.04283 1st Qu.:4.000 1st Qu.:0.06480 1st Qu.: 8.00
## Median :0.04960 Median :4.000 Median :0.08370 Median :10.00
## Mean :0.05332 Mean :3.931 Mean :0.09471 Mean :10.32
## 3rd Qu.:0.05878 3rd Qu.:4.000 3rd Qu.:0.11150 3rd Qu.:12.00
## Max. :0.41370 Max. :5.000 Max. :0.49210 Max. :88.00
## target genre popularity
## Min. :0.0 Min. : 1.000 Min. : 0.00
## 1st Qu.:0.0 1st Qu.: 3.000 1st Qu.: 48.00
## Median :0.5 Median : 3.000 Median : 68.00
## Mean :0.5 Mean : 5.296 Mean : 60.47
## 3rd Qu.:1.0 3rd Qu.: 9.000 3rd Qu.: 73.00
## Max. :1.0 Max. :12.000 Max. :100.00
Analyzing the trend of values of different features over
decades
artist_pop = c(mean(sixties$popularity),mean(seventies$popularity),mean(eighties$popularity),mean(nineties$popularity),mean(twentieth$popularity),mean(twentieth_1$popularity))
instrumental = c(mean(sixties$instrumentalness),mean(seventies$instrumentalness),mean(eighties$instrumentalness),mean(nineties$instrumentalness),mean(twentieth$instrumentalness),mean(twentieth_1$instrumentalness))
dance = c(mean(sixties$danceability),mean(seventies$danceability),mean(eighties$danceability),mean(nineties$danceability),mean(twentieth$danceability),mean(twentieth_1$danceability))
energy = c(mean(sixties$energy),mean(seventies$energy),mean(eighties$energy),mean(nineties$energy),mean(twentieth$energy),mean(twentieth_1$energy))
loud = c(mean(sixties$loudness),mean(seventies$loudness),mean(eighties$loudness),mean(nineties$loudness),mean(twentieth$loudness),mean(twentieth_1$loudness))
mode = c(mean(sixties$mode),mean(seventies$mode),mean(eighties$mode),mean(nineties$mode),mean(twentieth$mode),mean(twentieth_1$mode))
speech = c(mean(sixties$speechiness),mean(seventies$speechiness),mean(eighties$speechiness),mean(nineties$speechiness),mean(twentieth$speechiness),mean(twentieth_1$speechiness))
acoustic = c(mean(sixties$acousticness),mean(seventies$acousticness),mean(eighties$acousticness),mean(nineties$acousticness),mean(twentieth$acousticness),mean(twentieth_1$acousticness))
live = c(mean(sixties$liveness),mean(seventies$liveness),mean(eighties$liveness),mean(nineties$liveness),mean(twentieth$liveness),mean(twentieth_1$liveness))
valence = c(mean(sixties$valence),mean(seventies$valence),mean(eighties$valence),mean(nineties$valence),mean(twentieth$valence),mean(twentieth_1$valence))
tempo = c(mean(sixties$tempo),mean(seventies$tempo),mean(eighties$tempo),mean(nineties$tempo),mean(twentieth$tempo),mean(twentieth_1$tempo))
duration = c(mean(sixties$duration_ms),mean(seventies$duration_ms),mean(eighties$duration_ms),mean(nineties$duration_ms),mean(twentieth$duration_ms),mean(twentieth_1$duration_ms))
time_sig = c(mean(sixties$time_signature),mean(seventies$time_signature),mean(eighties$time_signature),mean(nineties$time_signature),mean(twentieth$time_signature),mean(twentieth_1$time_signature))
genre = c(mean(sixties$genre),mean(seventies$genre),mean(eighties$genre),mean(nineties$genre),mean(twentieth$genre),mean(twentieth_1$genre))
x = c("1960-70", "1970-80", "1980-90", "1990-2000", "2000-10", "2010-20")
library(plotly)
data1 <- data.frame(x = x, y = artist_pop)
fig1 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig1 <- fig1 %>% layout(title = "Popularity",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data2 <- data.frame(x = x, y = instrumental)
fig2 <- plot_ly(data2, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig2 <- fig2 %>% layout(title = "Instrumentalness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data3 <- data.frame(x = x, y = dance)
fig3 <- plot_ly(data3, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig3 <- fig3 %>% layout(title = "Danceability",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data4 <- data.frame(x = x, y = speech)
fig4 <- plot_ly(data4, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig4 <- fig4 %>% layout(title = "Speechiness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data5 <- data.frame(x = x, y = acoustic)
fig5 <- plot_ly(data5, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig5 <- fig5 %>% layout(title = "Acousticness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data6 <- data.frame(x = x, y = live)
fig6 <- plot_ly(data6, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig6 <- fig6 %>% layout(title = "Liveness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data7 <- data.frame(x = x, y = loud)
fig7 <- plot_ly(data7, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig7 <- fig7 %>% layout(title = "Loudness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data8 <- data.frame(x = x, y = tempo)
fig8 <- plot_ly(data8, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig8 <- fig8 %>% layout(title = "Tempo",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data9 <- data.frame(x = x, y = energy)
fig9 <- plot_ly(data9, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig9 <- fig9 %>% layout(title = "Energy",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data10 <- data.frame(x = x, y = valence)
fig10 <- plot_ly(data10, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig10 <- fig10 %>% layout(title = "Valence",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data11 <- data.frame(x = x, y = mode)
fig11 <- plot_ly(data11, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig11 <- fig11 %>% layout(title = "Mode",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data12 <- data.frame(x = x, y = duration)
fig12 <- plot_ly(data12, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig12 <- fig12 %>% layout(title = "Duration",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data13 <- data.frame(x = x, y = genre)
fig13 <- plot_ly(data13, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig13 <- fig13 %>% layout(title = "Genre",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data14 <- data.frame(x = x, y = time_sig)
fig14 <- plot_ly(data14, x = ~x, y = ~y, type = 'bar',
marker = list(color = c('steelblue', 'cornflowerblue',
'navy', 'powderblue',
'darkslateblue', 'lightskyblue')))
fig14 <- fig14 %>% layout(title = "Time Signature",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
fig1
fig2
fig3
fig4
fig5
fig6
fig7
fig8
fig9
fig10
fig11
fig12
fig13
fig14
Analyzing the influence of features on Hit of
Song
library(caret)
data_temp = data[, -17]
data_temp = cbind(data_temp, target = data$target)
data_temp = data_temp[, -1]
df <- data_temp
# Now setup the translation vector - essentially a lookup table
trans <- c("Miss","Hit")
names(trans) <- c(0,1)
# Now translate the values into a new column and print it out
df$target <- as.factor(trans[as.character(df$target) ])
head(df)
## danceability energy key loudness mode speechiness acousticness
## 4814 0.417 0.620 3 0.7836 1 0.0403 0.490
## 4815 0.498 0.505 3 0.6940 1 0.0337 0.018
## 4816 0.657 0.649 5 0.6767 1 0.0380 0.846
## 4817 0.590 0.545 7 0.7018 0 0.1040 0.706
## 4818 0.515 0.765 11 0.8630 0 0.1240 0.857
## 4819 0.697 0.673 0 0.7299 1 0.0266 0.714
## instrumentalness liveness valence tempo duration_ms time_signature
## 4814 0.0000 0.0779 0.845 0.7690 0.0381 3
## 4815 0.1070 0.1760 0.797 0.4217 0.0478 4
## 4816 0.0000 0.1190 0.908 0.4802 0.0503 4
## 4817 0.0246 0.0610 0.967 0.4374 0.0344 4
## 4818 0.0009 0.2130 0.906 0.4748 0.0555 4
## 4819 0.9190 0.1220 0.778 0.4644 0.0367 4
## chorus_hit sections genre popularity target
## 4814 0.0761 9 2 23 Hit
## 4815 0.1127 10 12 63 Miss
## 4816 0.0859 12 2 19 Miss
## 4817 0.0571 8 3 67 Miss
## 4818 0.0503 14 3 54 Miss
## 4819 0.1512 7 5 70 Miss
class(df$target)
## [1] "factor"
library(caret)
#df = iris
roc_imp <- filterVarImp(x = df[, -ncol(df)], y = df$target)
roc_imp = data.frame(Feature = row.names(roc_imp), Hit = roc_imp$Hit, Miss = roc_imp$Miss)
roc_imp
## Feature Hit Miss
## 1 danceability 0.6957654 0.6957654
## 2 energy 0.5856455 0.5856455
## 3 key 0.5054254 0.5054254
## 4 loudness 0.6429538 0.6429538
## 5 mode 0.5367100 0.5367100
## 6 speechiness 0.5751621 0.5751621
## 7 acousticness 0.5970058 0.5970058
## 8 instrumentalness 0.7343641 0.7343641
## 9 liveness 0.5318075 0.5318075
## 10 valence 0.6393925 0.6393925
## 11 tempo 0.5221276 0.5221276
## 12 duration_ms 0.5124104 0.5124104
## 13 time_signature 0.5335779 0.5335779
## 14 chorus_hit 0.5186910 0.5186910
## 15 sections 0.5129107 0.5129107
## 16 genre 0.6103773 0.6103773
## 17 popularity 0.7324852 0.7324852
plot_ <- ggplot(roc_imp,
aes(x= reorder(Feature,
Hit) , y = Hit) ) +
geom_bar(stat = "identity",
fill = "burlywood") +
coord_flip() +
theme_light(base_size = 20) +
xlab("") +
ylab("Importance")+
ggtitle("Features influencing Hit of a song") +
theme(plot.title = element_text(size=18))
plot_

df <- sixties
df = df[, -1]
Y = df$target
df = df[, -16]
df = cbind(df, target = Y)
# Now setup the translation vector - essentially a lookup table
trans <- c("Miss","Hit")
names(trans) <- c(0,1)
# Now translate the values into a new column and print it out
df$target <- as.factor(trans[as.character(df$target) ])
head(df)
## danceability energy key loudness mode speechiness acousticness
## 4814 0.417 0.620 3 0.7836 1 0.0403 0.490
## 4815 0.498 0.505 3 0.6940 1 0.0337 0.018
## 4816 0.657 0.649 5 0.6767 1 0.0380 0.846
## 4817 0.590 0.545 7 0.7018 0 0.1040 0.706
## 4818 0.515 0.765 11 0.8630 0 0.1240 0.857
## 4819 0.697 0.673 0 0.7299 1 0.0266 0.714
## instrumentalness liveness valence tempo duration_ms time_signature
## 4814 0.0000 0.0779 0.845 0.7690 0.0381 3
## 4815 0.1070 0.1760 0.797 0.4217 0.0478 4
## 4816 0.0000 0.1190 0.908 0.4802 0.0503 4
## 4817 0.0246 0.0610 0.967 0.4374 0.0344 4
## 4818 0.0009 0.2130 0.906 0.4748 0.0555 4
## 4819 0.9190 0.1220 0.778 0.4644 0.0367 4
## chorus_hit sections genre popularity target
## 4814 0.0761 9 2 23 Hit
## 4815 0.1127 10 12 63 Miss
## 4816 0.0859 12 2 19 Miss
## 4817 0.0571 8 3 67 Miss
## 4818 0.0503 14 3 54 Miss
## 4819 0.1512 7 5 70 Miss
class(df$target)
## [1] "factor"
library(caret)
#df = iris
roc_imp <- filterVarImp(x = df[, -ncol(df)], y = df$target)
roc_imp = data.frame(Feature = row.names(roc_imp), Hit = roc_imp$Hit, Miss = roc_imp$Miss)
roc_imp
## Feature Hit Miss
## 1 danceability 0.6316814 0.6316814
## 2 energy 0.6742654 0.6742654
## 3 key 0.5104408 0.5104408
## 4 loudness 0.6680412 0.6680412
## 5 mode 0.5833140 0.5833140
## 6 speechiness 0.6107185 0.6107185
## 7 acousticness 0.6813122 0.6813122
## 8 instrumentalness 0.6959315 0.6959315
## 9 liveness 0.5128090 0.5128090
## 10 valence 0.6790956 0.6790956
## 11 tempo 0.5742807 0.5742807
## 12 duration_ms 0.5725549 0.5725549
## 13 time_signature 0.5132838 0.5132838
## 14 chorus_hit 0.5077792 0.5077792
## 15 sections 0.5364926 0.5364926
## 16 genre 0.6530690 0.6530690
## 17 popularity 0.5373768 0.5373768
plot_ <- ggplot(roc_imp,
aes(x= reorder(Feature,
Hit) , y = Hit) ) +
geom_bar(stat = "identity",
fill = "goldenrod") +
coord_flip() +
theme_light(base_size = 20) +
xlab("") +
ylab("Importance")+
ggtitle("Features influencing Hit of a song") +
theme(plot.title = element_text(size=18))
plot_

df <- seventies
df = df[, -1]
Y = df$target
df = df[, -16]
df = cbind(df, target = Y)
# Now setup the translation vector - essentially a lookup table
trans <- c("Miss","Hit")
names(trans) <- c(0,1)
# Now translate the values into a new column and print it out
df$target <- as.factor(trans[as.character(df$target) ])
head(df)
## danceability energy key loudness mode speechiness acousticness
## 12715 0.669 0.547 1 0.7431 0 0.0576 0.3530
## 12716 0.291 0.300 1 0.6642 0 0.0326 0.9050
## 12717 0.355 0.968 7 0.8409 1 0.1890 0.0504
## 12718 0.598 0.891 6 0.7047 1 0.0552 0.0183
## 12719 0.440 0.376 5 0.7229 1 0.0482 0.3480
## 38470 0.618 0.167 7 0.6132 1 0.0451 0.8280
## instrumentalness liveness valence tempo duration_ms time_signature
## 12715 0.000 0.0515 0.730 0.5429 0.0303 4
## 12716 0.492 0.1180 0.039 0.3132 0.0513 4
## 12717 0.000 0.1300 0.768 0.7509 0.0124 3
## 12718 0.836 0.5790 0.826 0.5980 0.0337 4
## 12719 0.000 0.0815 0.358 0.5403 0.0489 4
## 38470 0.000 0.1970 0.651 0.7288 0.0283 4
## chorus_hit sections genre popularity target
## 12715 0.0665 6 3 54 Miss
## 12716 0.1018 9 6 5 Miss
## 12717 0.0582 6 9 42 Miss
## 12718 0.1284 7 3 49 Miss
## 12719 0.0806 12 1 40 Hit
## 38470 0.1345 7 2 68 Miss
class(df$target)
## [1] "factor"
library(caret)
#df = iris
roc_imp <- filterVarImp(x = df[, -ncol(df)], y = df$target)
roc_imp = data.frame(Feature = row.names(roc_imp), Hit = roc_imp$Hit, Miss = roc_imp$Miss)
roc_imp
## Feature Hit Miss
## 1 danceability 0.6563745 0.6563745
## 2 energy 0.6343077 0.6343077
## 3 key 0.5119896 0.5119896
## 4 loudness 0.6302515 0.6302515
## 5 mode 0.5418491 0.5418491
## 6 speechiness 0.6077975 0.6077975
## 7 acousticness 0.6550108 0.6550108
## 8 instrumentalness 0.6886291 0.6886291
## 9 liveness 0.5331499 0.5331499
## 10 valence 0.6398434 0.6398434
## 11 tempo 0.5196391 0.5196391
## 12 duration_ms 0.5527932 0.5527932
## 13 time_signature 0.5427141 0.5427141
## 14 chorus_hit 0.5023828 0.5023828
## 15 sections 0.5312617 0.5312617
## 16 genre 0.6069625 0.6069625
## 17 popularity 0.6978207 0.6978207
plot_ <- ggplot(roc_imp,
aes(x= reorder(Feature,
Hit) , y = Hit) ) +
geom_bar(stat = "identity",
fill = "yellowgreen") +
coord_flip() +
theme_light(base_size = 20) +
xlab("") +
ylab("Importance")+
ggtitle("Features influencing Hit of a song") +
theme(plot.title = element_text(size=18))
plot_

df <- eighties
df = df[, -1]
Y = df$target
df = df[, -16]
df = cbind(df, target = Y)
# Now setup the translation vector - essentially a lookup table
trans <- c("Miss","Hit")
names(trans) <- c(0,1)
# Now translate the values into a new column and print it out
df$target <- as.factor(trans[as.character(df$target) ])
head(df)
## danceability energy key loudness mode speechiness acousticness
## 19540 0.509 0.277 6 0.6591 1 0.0495 0.827000
## 19541 0.716 0.753 2 0.8221 1 0.0286 0.162000
## 1212 0.360 0.542 5 0.6674 1 0.0339 0.368000
## 19542 0.656 0.512 7 0.7053 1 0.0290 0.585000
## 19543 0.642 0.889 2 0.8233 0 0.0494 0.375000
## 19544 0.296 0.547 4 0.6441 0 0.0327 0.000291
## instrumentalness liveness valence tempo duration_ms time_signature
## 19540 0.0021 0.0756 0.640 0.4190 0.0353 4
## 19541 0.0306 0.0831 0.561 0.4976 0.0498 4
## 1212 0.1650 0.1160 0.803 0.4839 0.1034 4
## 19542 0.0000 0.0720 0.880 0.4039 0.0343 3
## 19543 0.0000 0.1800 0.764 0.6766 0.0354 4
## 19544 0.0136 0.3720 0.490 0.6162 0.0503 4
## chorus_hit sections genre popularity target
## 19540 0.1581 7 2 31 Miss
## 19541 0.1332 11 12 46 Hit
## 1212 0.0701 17 3 47 Miss
## 19542 0.1177 7 2 61 Miss
## 19543 0.0776 7 3 37 Hit
## 19544 0.1224 11 9 42 Miss
class(df$target)
## [1] "factor"
library(caret)
#df = iris
roc_imp <- filterVarImp(x = df[, -ncol(df)], y = df$target)
roc_imp = data.frame(Feature = row.names(roc_imp), Hit = roc_imp$Hit, Miss = roc_imp$Miss)
roc_imp
## Feature Hit Miss
## 1 danceability 0.6974345 0.6974345
## 2 energy 0.5863365 0.5863365
## 3 key 0.5088141 0.5088141
## 4 loudness 0.6246034 0.6246034
## 5 mode 0.5208454 0.5208454
## 6 speechiness 0.6345586 0.6345586
## 7 acousticness 0.6028990 0.6028990
## 8 instrumentalness 0.6791653 0.6791653
## 9 liveness 0.5778596 0.5778596
## 10 valence 0.6458995 0.6458995
## 11 tempo 0.5345981 0.5345981
## 12 duration_ms 0.5869931 0.5869931
## 13 time_signature 0.5366955 0.5366955
## 14 chorus_hit 0.5029713 0.5029713
## 15 sections 0.5599948 0.5599948
## 16 genre 0.5967002 0.5967002
## 17 popularity 0.7341685 0.7341685
plot_ <- ggplot(roc_imp,
aes(x= reorder(Feature,
Hit) , y = Hit) ) +
geom_bar(stat = "identity",
fill = "yellowgreen") +
coord_flip() +
theme_light(base_size = 20) +
xlab("") +
ylab("Importance")+
ggtitle("Features influencing Hit of a song") +
theme(plot.title = element_text(size=18))
plot_

df <- nineties
df = df[, -1]
Y = df$target
df = df[, -16]
df = cbind(df, target = Y)
# Now setup the translation vector - essentially a lookup table
trans <- c("Miss","Hit")
names(trans) <- c(0,1)
# Now translate the values into a new column and print it out
df$target <- as.factor(trans[as.character(df$target) ])
head(df)
## danceability energy key loudness mode speechiness acousticness
## 25317 0.527 0.316 1 0.6318 1 0.0310 6.93e-01
## 25318 0.738 0.541 1 0.8259 1 0.0311 5.59e-01
## 25319 0.736 0.419 0 0.7282 1 0.0300 6.93e-01
## 25320 0.565 0.594 5 0.6824 1 0.0646 6.55e-01
## 25321 0.513 0.760 4 0.7392 1 0.0355 1.72e-05
## 25322 0.166 0.985 2 0.8439 1 0.1190 1.47e-01
## instrumentalness liveness valence tempo duration_ms time_signature
## 25317 0.0070 0.1680 0.543 0.4814 0.0346 4
## 25318 0.0000 0.0492 0.309 0.5558 0.0896 4
## 25319 0.4950 0.0809 0.265 0.3893 0.0535 4
## 25320 0.9260 0.6750 0.763 0.4731 0.0868 4
## 25321 0.0034 0.1530 0.961 0.6344 0.1000 4
## 25322 0.1790 0.5400 0.609 0.7043 0.0122 4
## chorus_hit sections genre popularity target
## 25317 0.1244 6 2 65 Miss
## 25318 0.0743 16 1 59 Hit
## 25319 0.0971 9 3 44 Miss
## 25320 0.1870 10 3 9 Miss
## 25321 0.0590 20 9 36 Miss
## 25322 0.0649 4 9 23 Miss
class(df$target)
## [1] "factor"
library(caret)
#df = iris
roc_imp <- filterVarImp(x = df[, -ncol(df)], y = df$target)
roc_imp = data.frame(Feature = row.names(roc_imp), Hit = roc_imp$Hit, Miss = roc_imp$Miss)
roc_imp
## Feature Hit Miss
## 1 danceability 0.7604027 0.7604027
## 2 energy 0.5987431 0.5987431
## 3 key 0.5298861 0.5298861
## 4 loudness 0.6687709 0.6687709
## 5 mode 0.5086957 0.5086957
## 6 speechiness 0.5533268 0.5533268
## 7 acousticness 0.6363279 0.6363279
## 8 instrumentalness 0.7348142 0.7348142
## 9 liveness 0.5619709 0.5619709
## 10 valence 0.5932394 0.5932394
## 11 tempo 0.5203518 0.5203518
## 12 duration_ms 0.6409588 0.6409588
## 13 time_signature 0.5335507 0.5335507
## 14 chorus_hit 0.5251889 0.5251889
## 15 sections 0.5976546 0.5976546
## 16 genre 0.6159086 0.6159086
## 17 popularity 0.7501319 0.7501319
plot_ <- ggplot(roc_imp,
aes(x= reorder(Feature,
Hit) , y = Hit) ) +
geom_bar(stat = "identity",
fill = "gold2") +
coord_flip() +
theme_light(base_size = 20) +
xlab("") +
ylab("Importance")+
ggtitle("Features influencing Hit of a song") +
theme(plot.title = element_text(size=18))
plot_

df <- twentieth
df = df[, -1]
Y = df$target
df = df[, -16]
df = cbind(df, target = Y)
# Now setup the translation vector - essentially a lookup table
trans <- c("Miss","Hit")
names(trans) <- c(0,1)
# Now translate the values into a new column and print it out
df$target <- as.factor(trans[as.character(df$target) ])
head(df)
## danceability energy key loudness mode speechiness acousticness
## 2765 0.578 0.471 4 0.7922 1 0.0289 3.68e-01
## 29677 0.704 0.854 10 0.8260 0 0.1830 1.85e-02
## 29678 0.162 0.836 9 0.8726 1 0.0473 1.11e-04
## 29679 0.188 0.994 4 0.8587 1 0.1660 7.39e-06
## 29680 0.630 0.764 2 0.8472 1 0.0275 3.63e-01
## 29681 0.726 0.837 11 0.7931 0 0.0965 3.73e-01
## instrumentalness liveness valence tempo duration_ms time_signature
## 2765 0.0000 0.159 0.532 0.5512 0.0437 4
## 29677 0.0000 0.148 0.688 0.3852 0.0547 4
## 29678 0.0046 0.174 0.300 0.3602 0.0779 4
## 29679 0.0784 0.192 0.333 0.6149 0.0579 4
## 29680 0.0000 0.125 0.631 0.4643 0.0430 4
## 29681 0.2680 0.136 0.969 0.5606 0.0427 4
## chorus_hit sections genre popularity target
## 2765 0.0713 13 2 62 Hit
## 29677 0.0958 10 3 60 Hit
## 29678 0.1508 13 9 46 Miss
## 29679 0.1353 9 9 0 Miss
## 29680 0.0522 10 2 70 Hit
## 29681 0.0653 10 7 3 Miss
class(df$target)
## [1] "factor"
library(caret)
#df = iris
roc_imp <- filterVarImp(x = df[, -ncol(df)], y = df$target)
roc_imp = data.frame(Feature = row.names(roc_imp), Hit = roc_imp$Hit, Miss = roc_imp$Miss)
roc_imp
## Feature Hit Miss
## 1 danceability 0.7593430 0.7593430
## 2 energy 0.5222120 0.5222120
## 3 key 0.5030401 0.5030401
## 4 loudness 0.6833110 0.6833110
## 5 mode 0.5459809 0.5459809
## 6 speechiness 0.5308738 0.5308738
## 7 acousticness 0.5237099 0.5237099
## 8 instrumentalness 0.8077563 0.8077563
## 9 liveness 0.5327838 0.5327838
## 10 valence 0.6618635 0.6618635
## 11 tempo 0.5273183 0.5273183
## 12 duration_ms 0.5473233 0.5473233
## 13 time_signature 0.5426745 0.5426745
## 14 chorus_hit 0.5452337 0.5452337
## 15 sections 0.5180511 0.5180511
## 16 genre 0.6123958 0.6123958
## 17 popularity 0.8665786 0.8665786
plot_ <- ggplot(roc_imp,
aes(x= reorder(Feature,
Hit) , y = Hit) ) +
geom_bar(stat = "identity",
fill = "yellow2") +
coord_flip() +
theme_light(base_size = 20) +
xlab("") +
ylab("Importance")+
ggtitle("Features influencing Hit of a song") +
theme(plot.title = element_text(size=18))
plot_

df <- twentieth_1
df = df[, -1]
Y = df$target
df = df[, -16]
df = cbind(df, target = Y)
# Now setup the translation vector - essentially a lookup table
trans <- c("Miss","Hit")
names(trans) <- c(0,1)
# Now translate the values into a new column and print it out
df$target <- as.factor(trans[as.character(df$target) ])
head(df)
## danceability energy key loudness mode speechiness acousticness
## 33846 0.741 0.626 1 0.8383 0 0.0886 0.02000
## 33847 0.447 0.247 5 0.6527 0 0.0346 0.87100
## 3677 0.550 0.415 9 0.8056 0 0.0520 0.16100
## 33848 0.502 0.648 0 0.8218 0 0.0527 0.00513
## 40060 0.807 0.887 1 0.8559 1 0.2750 0.00381
## 33849 0.482 0.873 0 0.8700 1 0.0853 0.01110
## instrumentalness liveness valence tempo duration_ms time_signature
## 33846 0.000 0.0828 0.706 0.4475 0.0417 4
## 33847 0.814 0.0946 0.250 0.6441 0.0389 3
## 3677 0.000 0.1080 0.274 0.7127 0.0458 4
## 33848 0.000 0.2040 0.291 0.3804 0.0428 4
## 40060 0.000 0.3910 0.780 0.6649 0.0311 4
## 33849 0.000 0.4090 0.737 0.6838 0.0479 4
## chorus_hit sections genre popularity target
## 33846 0.0951 10 3 81 Hit
## 33847 0.0766 9 6 42 Miss
## 3677 0.1036 9 1 77 Hit
## 33848 0.0682 7 9 57 Miss
## 40060 0.0577 8 3 68 Hit
## 33849 0.0743 12 9 80 Hit
class(df$target)
## [1] "factor"
library(caret)
#df = iris
roc_imp <- filterVarImp(x = df[, -ncol(df)], y = df$target)
roc_imp = data.frame(Feature = row.names(roc_imp), Hit = roc_imp$Hit, Miss = roc_imp$Miss)
roc_imp
## Feature Hit Miss
## 1 danceability 0.7102826 0.7102826
## 2 energy 0.5283174 0.5283174
## 3 key 0.5032668 0.5032668
## 4 loudness 0.6614895 0.6614895
## 5 mode 0.5153173 0.5153173
## 6 speechiness 0.5223400 0.5223400
## 7 acousticness 0.5319851 0.5319851
## 8 instrumentalness 0.8239553 0.8239553
## 9 liveness 0.5070159 0.5070159
## 10 valence 0.6236275 0.6236275
## 11 tempo 0.5256137 0.5256137
## 12 duration_ms 0.5786638 0.5786638
## 13 time_signature 0.5398308 0.5398308
## 14 chorus_hit 0.5392629 0.5392629
## 15 sections 0.5352752 0.5352752
## 16 genre 0.5741890 0.5741890
## 17 popularity 0.8974934 0.8974934
plot_ <- ggplot(roc_imp,
aes(x= reorder(Feature,
Hit) , y = Hit) ) +
geom_bar(stat = "identity",
fill = "orange") +
coord_flip() +
theme_light(base_size = 20) +
xlab("") +
ylab("Importance")+
ggtitle("Features influencing Hit of a song") +
theme(plot.title = element_text(size=18))
plot_

Analyzing the trend of popularity of Genres over
Decades
genre_count = function(data){
# Group the songs by genre and calculate the count of songs with target as 1 in each group
genre_cnt <- aggregate(data$target, by=list(data$genre), FUN=function(x) sum(x==1))
# Create a vector to map the genre codes to names
genre_cnt = as.data.frame(genre_cnt)
genre_map <- c("ballad", "country", "dance pop", "edm", "indie", "jazz", "karaoke", "opm", "punk", "rap", "rb", "rock")
# Rename the columns for better readability
colnames(genre_cnt) <- c("Genre", "Count")
# Use the factor function to map the Genre column to genre names
genre_cnt$Genre <- factor(genre_cnt$Genre, levels = 1:12, labels = genre_map)
return(genre_cnt)
}
genre_analysis = function(data,decade){
# Get the genre count data
genre_data = genre_count(data)
#print(genre_data)
library(plotly)
library(scales)
# Calculate the percentage values
genre_data$Percent <- percent(genre_data$Count / sum(genre_data$Count))
# Create a vector of colors for the pie chart
colors <- c('#6d3678', '#368039', '#f7cbe4', '#CC79A7', '#84dbb2', '#c96da0', '#aadb84', '#73356d', '#84cf88', '#ba6dc9', '#abc96d', '#a7256c')
# Create the pie chart using ggplot2
plot_ly(data=genre_data,values=~Count,labels=~Genre, textposition="outside",textinfo = 'percent',
hoverinfo='label',outsidetextfont = list(color = 'midnightblue'), marker=list(colors=colors,
line=list(color="white",width=1)),type="pie") %>%
layout(title=decade)
}
genre_analysis(sixties,"1960-70")
genre_analysis(seventies,"1970-80")
genre_analysis(eighties,"1980-90")
genre_analysis(nineties,"1990-2000")
genre_analysis(twentieth,"2000-10")
genre_analysis(twentieth_1,"2010-20")
Analyzing the trend of values of different features over
decades for Hit Songs
sixties_hit=sixties[(sixties$target==1),]
seventies_hit=seventies[(seventies$target==1),]
eighties_hit=eighties[(eighties$target==1),]
nineties_hit=nineties[(nineties$target==1),]
twentieth_hit=twentieth[(twentieth$target==1),]
twentieth_1_hit=twentieth_1[(twentieth_1$target==1),]
artist_pop = c(mean(sixties_hit$popularity),mean(seventies_hit$popularity),mean(eighties_hit$popularity),mean(nineties_hit$popularity),mean(twentieth_hit$popularity),mean(twentieth_1_hit$popularity))
artist_pop
## [1] 48.60588 55.32964 59.25217 58.95688 67.19346 74.14223
instrumental = c(mean(sixties_hit$instrumentalness),mean(seventies_hit$instrumentalness),mean(eighties_hit$instrumentalness),mean(nineties_hit$instrumentalness),mean(twentieth_hit$instrumentalness),mean(twentieth_1_hit$instrumentalness))
dance = c(mean(sixties_hit$danceability),mean(seventies_hit$danceability),mean(eighties_hit$danceability),mean(nineties_hit$danceability),mean(twentieth_hit$danceability),mean(twentieth_1_hit$danceability))
energy = c(mean(sixties_hit$energy),mean(seventies_hit$energy),mean(eighties_hit$energy),mean(nineties_hit$energy),mean(twentieth_hit$energy),mean(twentieth_1_hit$energy))
loud = c(mean(sixties_hit$loudness),mean(seventies_hit$loudness),mean(eighties_hit$loudness),mean(nineties_hit$loudness),mean(twentieth_hit$loudness),mean(twentieth_1_hit$loudness))
mode = c(mean(sixties_hit$mode),mean(seventies_hit$mode),mean(eighties_hit$mode),mean(nineties_hit$mode),mean(twentieth_hit$mode),mean(twentieth_1_hit$mode))
speech = c(mean(sixties_hit$speechiness),mean(seventies_hit$speechiness),mean(eighties_hit$speechiness),mean(nineties_hit$speechiness),mean(twentieth_hit$speechiness),mean(twentieth_1_hit$speechiness))
acoustic = c(mean(sixties_hit$acousticness),mean(seventies_hit$acousticness),mean(eighties_hit$acousticness),mean(nineties_hit$acousticness),mean(twentieth_hit$acousticness),mean(twentieth_1_hit$acousticness))
live = c(mean(sixties_hit$liveness),mean(seventies_hit$liveness),mean(eighties_hit$liveness),mean(nineties_hit$liveness),mean(twentieth_hit$liveness),mean(twentieth_1_hit$liveness))
valence = c(mean(sixties_hit$valence),mean(seventies_hit$valence),mean(eighties_hit$valence),mean(nineties_hit$valence),mean(twentieth_hit$valence),mean(twentieth_1_hit$valence))
tempo = c(mean(sixties_hit$tempo),mean(seventies_hit$tempo),mean(eighties_hit$tempo),mean(nineties_hit$tempo),mean(twentieth_hit$tempo),mean(twentieth_1_hit$tempo))
duration = c(mean(sixties_hit$duration_ms),mean(seventies_hit$duration_ms),mean(eighties_hit$duration_ms),mean(nineties_hit$duration_ms),mean(twentieth_hit$duration_ms),mean(twentieth_1_hit$duration_ms))
time_sig = c(mean(sixties_hit$time_signature),mean(seventies_hit$time_signature),mean(eighties_hit$time_signature),mean(nineties_hit$time_signature),mean(twentieth_hit$time_signature),mean(twentieth_1_hit$time_signature))
genre = c(mean(sixties_hit$genre),mean(seventies_hit$genre),mean(eighties_hit$genre),mean(nineties_hit$genre),mean(twentieth_hit$genre),mean(twentieth_1_hit$genre))
x = c("1960-70", "1970-80", "1980-90", "1990-2000", "2000-10", "2010-20")
data1 <- data.frame(x = x, y = artist_pop)
fig1 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig1 <- fig1 %>% layout(title = "Popularity",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = instrumental)
fig2 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig2 <- fig2 %>% layout(title = "Instrumentalness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = dance)
fig3 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig3 <- fig3 %>% layout(title = "Dance",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = energy)
fig4 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig4 <- fig4 %>% layout(title = "Energy",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = loud)
fig5 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig5 <- fig1 %>% layout(title = "Loudness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = mode)
fig6 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig6 <- fig6 %>% layout(title = "Mode",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = speech)
fig7 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig7 <- fig7 %>% layout(title = "Speechiness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = acoustic)
fig8 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig8 <- fig8 %>% layout(title = "Acousticness",
xaxis = list(title = "Decades"), yaxis = list(title = ""))
data1 <- data.frame(x = x, y = live)
fig9 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig9 <- fig9 %>% layout(title = "Liveliness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = valence)
fig9 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig9 <- fig9 %>% layout(title = "Valence",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = tempo)
fig10 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig10 <- fig10 %>% layout(title = "Tempo",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = duration)
fig11 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig11 <- fig11 %>% layout(title = "Duration",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = time_sig)
fig12 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig12 <- fig12 %>% layout(title = "Time signature",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = genre)
fig13 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c( "#DDA0DD", "#BA55D3", "#9370DB",
"#8A2BE2", "#9400D3", "#4B0082")))
fig13 <- fig13 %>% layout(title = "Genre",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
fig1
fig2
fig3
fig4
fig5
fig6
fig7
fig8
fig9
fig10
fig11
fig12
fig13
Analyzing the trend of values of different features over
decades for miss Songs
sixties_miss=sixties[(sixties$target==0),]
seventies_miss=seventies[(seventies$target==0),]
eighties_miss=eighties[(eighties$target==0),]
nineties_miss=nineties[(nineties$target==0),]
twentieth_miss=twentieth[(twentieth$target==0),]
twentieth_1_miss=twentieth_1[(twentieth_1$target==0),]
artist_pop = c(mean(sixties_miss$popularity),mean(seventies_miss$popularity),mean(eighties_miss$popularity),mean(nineties_miss$popularity),mean(twentieth_miss$popularity),mean(twentieth_1_miss$popularity))
artist_pop
## [1] 45.00023 43.33299 46.02287 43.52101 41.85184 46.79869
instrumental = c(mean(sixties_miss$instrumentalness),mean(seventies_miss$instrumentalness),mean(eighties_miss$instrumentalness),mean(nineties_miss$instrumentalness),mean(twentieth_miss$instrumentalness),mean(twentieth_1_miss$instrumentalness))
dance = c(mean(sixties_miss$danceability),mean(seventies_miss$danceability),mean(eighties_miss$danceability),mean(nineties_miss$danceability),mean(twentieth_miss$danceability),mean(twentieth_1_miss$danceability))
energy = c(mean(sixties_miss$energy),mean(seventies_miss$energy),mean(eighties_miss$energy),mean(nineties_miss$energy),mean(twentieth_miss$energy),mean(twentieth_1_miss$energy))
loud = c(mean(sixties_miss$loudness),mean(seventies_miss$loudness),mean(eighties_miss$loudness),mean(nineties_miss$loudness),mean(twentieth_miss$loudness),mean(twentieth_1_miss$loudness))
mode = c(mean(sixties_miss$mode),mean(seventies_miss$mode),mean(eighties_miss$mode),mean(nineties_miss$mode),mean(twentieth_miss$mode),mean(twentieth_1_miss$mode))
speech = c(mean(sixties_miss$speechiness),mean(seventies_miss$speechiness),mean(eighties_miss$speechiness),mean(nineties_miss$speechiness),mean(twentieth_miss$speechiness),mean(twentieth_1_miss$speechiness))
acoustic = c(mean(sixties_miss$acousticness),mean(seventies_miss$acousticness),mean(eighties_miss$acousticness),mean(nineties_miss$acousticness),mean(twentieth_miss$acousticness),mean(twentieth_1_miss$acousticness))
live = c(mean(sixties_miss$liveness),mean(seventies_miss$liveness),mean(eighties_miss$liveness),mean(nineties_miss$liveness),mean(twentieth_miss$liveness),mean(twentieth_1_miss$liveness))
valence = c(mean(sixties_miss$valence),mean(seventies_miss$valence),mean(eighties_miss$valence),mean(nineties_miss$valence),mean(twentieth_miss$valence),mean(twentieth_1_miss$valence))
tempo = c(mean(sixties_miss$tempo),mean(seventies_miss$tempo),mean(eighties_miss$tempo),mean(nineties_miss$tempo),mean(twentieth_miss$tempo),mean(twentieth_1_miss$tempo))
duration = c(mean(sixties_miss$duration_ms),mean(seventies_miss$duration_ms),mean(eighties_miss$duration_ms),mean(nineties_miss$duration_ms),mean(twentieth_miss$duration_ms),mean(twentieth_1_miss$duration_ms))
time_sig = c(mean(sixties_miss$time_signature),mean(seventies_miss$time_signature),mean(eighties_miss$time_signature),mean(nineties_miss$time_signature),mean(twentieth_miss$time_signature),mean(twentieth_1_miss$time_signature))
genre = c(mean(sixties_miss$genre),mean(seventies_miss$genre),mean(eighties_miss$genre),mean(nineties_miss$genre),mean(twentieth_miss$genre),mean(twentieth_1_miss$genre))
x = c("1960-70", "1970-80", "1980-90", "1990-2000", "2000-10", "2010-20")
data1 <- data.frame(x = x, y = artist_pop)
fig1 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig1 <- fig1 %>% layout(title = "Popularity",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = instrumental)
fig2 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig2 <- fig2 %>% layout(title = "Instrumentalness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = dance)
fig3 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig3 <- fig3 %>% layout(title = "Dance",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = energy)
fig4 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig4 <- fig4 %>% layout(title = "Energy",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = loud)
fig5 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig5 <- fig1 %>% layout(title = "Loudness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = mode)
fig6 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig6 <- fig6 %>% layout(title = "Mode",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = speech)
fig7 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig7 <- fig7 %>% layout(title = "Speechiness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = acoustic)
fig8 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig8 <- fig8 %>% layout(title = "Acousticness",
xaxis = list(title = "Decades"), yaxis = list(title = ""))
data1 <- data.frame(x = x, y = live)
fig9 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig9 <- fig9 %>% layout(title = "Liveliness",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = valence)
fig9 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig9 <- fig9 %>% layout(title = "Valence",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = tempo)
fig10 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig10 <- fig10 %>% layout(title = "Tempo",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = duration)
fig11 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig11 <- fig11 %>% layout(title = "Duration",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = time_sig)
fig12 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig12 <- fig12 %>% layout(title = "Time signature",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
data1 <- data.frame(x = x, y = genre)
fig13 <- plot_ly(data1, x = ~x, y = ~y, type = 'bar',
marker = list(color = c("#FFC0CB", "#FFB6C1", "#FF69B4", "#DB7093", "#C71585",
"#FF1493")))
fig13 <- fig13 %>% layout(title = "Genre",
xaxis = list(title = "Decades"),
yaxis = list(title = ""))
fig1
fig2
fig3
fig4
fig5
fig6
fig7
fig8
fig9
fig10
fig11
fig12
fig13